/*************************************************************************
 *
 * Hitchhiker's Guide to the IBM PE 
 * Multi threaded Ray trace program
 * Chapter 3 - Don't Panic
 *
 * To compile:
 * mpcc_r -g -o rtrace_thread rtrace_thread.c
 *
 *
 * Description: 
 * This is a sample program that partitions N tasks into
 * two groups, a collect node and N - 1 compute nodes.
 * In addition each of the compute nodes split into two
 * threads. One to handle the computation, the other the
 * message passing.
 * The responsibility of the collect node is to collect the data
 * generated by the compute nodes. The compute nodes send the 
 * results of their work to the collect node for collection.
 *
 * In addition to being multi threaded itself this program uses
 * the threaded version of MPI libraries (required).
 *
 *******************************************************************/

#include <mpi.h>
#include <pthread.h>

 /******************************************************************/

#define PIXEL_WIDTH	100
#define PIXEL_HEIGHT	100

int Available_to_send = -1;

int Pixel_Data[PIXEL_WIDTH][PIXEL_HEIGHT];


pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t  my_cond  = PTHREAD_COND_INITIALIZER;


void *compute_thread(void * message);
void *message_thread(void * message);

void  collect_pixels(void);

 /******************************************************************/

void main(int argc, char *argv[])
{ 
	int	taskid;
 
	/* Find out number of tasks/nodes. */
	MPI_Init( &argc, &argv);
	MPI_Comm_rank( MPI_COMM_WORLD, &taskid);

	/* if this is the collect node */
      	if ( taskid == 0 )
	{
                /* the collect node has no need to b (user) threaded */
	   	collect_pixels();

	} else {

        	pthread_t 	send_thread;
		pthread_attr_t 	attr;

	        printf("Compute task #%d: checking in\n", taskid);

		pthread_attr_init(&attr);

		pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_UNDETACHED);

		/* Let's put the computation on another thread */
		pthread_create(&send_thread, &attr, compute_thread, NULL);

		/* Let's send messages on this thread */
		message_thread(NULL);

		/* Let's get back together when we are done */
		pthread_join(send_thread,NULL);

		printf("Compute #%d: done sending. ", taskid);

		pthread_attr_destroy(&attr);
	}

	printf("Task %d waiting to complete.\n", taskid);

	MPI_Barrier(MPI_COMM_WORLD);

	printf("Task %d complete.\n",taskid);

	MPI_Finalize();

	exit(); 
}

 /******************************************************************/
 /*
    The idea is that as each row is computed the "available to send"
    bar is moved. (This allows the message thread to send it)  
 */
 /******************************************************************/

void *compute_thread(void * message)
{
	int		offset;
	int		row, col;
	int		first_line;
      	int    		numtask;
	int		taskid;

	MPI_Comm_size( MPI_COMM_WORLD, &numtask);
	MPI_Comm_rank( MPI_COMM_WORLD, &taskid);

        offset 		= numtask - 1;
	first_line 	= taskid  - 1;

	for (row = first_line; row < PIXEL_HEIGHT; row += offset)
	{
		for ( col = 0; col < PIXEL_HEIGHT; col++)
		{
			/* here is where the a real computation should be */
			/* for our example we will just initialize thedata  */
			/* with something */

			Pixel_Data[row][col] = row + col; 
		}

		printf("done computing row %d\n", row);

                pthread_mutex_lock(&my_mutex);

                /* raise the bar to let the message thread do its work */
		Available_to_send = row;

                pthread_cond_signal(&my_cond);

                pthread_mutex_unlock(&my_mutex);
	}

	return;
}


 /******************************************************************/
 /*
    The idea here is to wait until the (calculated) row line is ready
    to send. 
 */
 /******************************************************************/

void *message_thread(void * message)
{
	int		offset;
	int		row, col;
	int		pixel_row[PIXEL_WIDTH +1];
	int		buff_width;
	MPI_Status	stat;
      	int    		numtask;
	int		taskid;
	int		first_line;
	int		last_sent = -1;
    
	MPI_Comm_size( MPI_COMM_WORLD, &numtask);
	MPI_Comm_rank( MPI_COMM_WORLD, &taskid);

        offset 		= numtask - 1;
	first_line 	= taskid  - 1;

	for (row = first_line; row < PIXEL_HEIGHT; row += offset)
	{
                pthread_mutex_lock(&my_mutex);

		/* wait until the data is ready */
		while (last_sent >= Available_to_send)
			pthread_cond_wait(&my_cond,&my_mutex);

                pthread_mutex_unlock(&my_mutex);

		printf(" sending row %d\n", row);

		/* row number goes in cell 0 */
		pixel_row[0] = row; 
		/* the rest of the data in cells 1..PIXEL_WIDTH + 1 */
		memcpy(&pixel_row[1],  &Pixel_Data[row], PIXEL_WIDTH);

		MPI_Send(pixel_row, PIXEL_WIDTH +1, MPI_INT, 0, 0, 
			MPI_COMM_WORLD);

		last_sent = row;
	}

	printf("Compute #%d: done sending. ", taskid);
	
	return;
}

 /******************************************************************/

void  collect_pixels(void)
{
	int		pixel_row[PIXEL_WIDTH +1];
	MPI_Status	stat;
	int     	mx = PIXEL_HEIGHT;
      	int    		numtask;
	int		taskid;

	MPI_Comm_size( MPI_COMM_WORLD, &numtask);
	MPI_Comm_rank( MPI_COMM_WORLD, &taskid);

	printf("Control #%d: No. of nodes used is %d\n",
			taskid,numtask);

	printf("Control: expect to receive %d messages\n", mx);

	while (mx > 0)
	{
		MPI_Recv(pixel_row, PIXEL_WIDTH + 1 , MPI_INT, MPI_ANY_SOURCE, 
			MPI_ANY_TAG, MPI_COMM_WORLD, &stat);

		printf("received row %d\n",pixel_row[0]);

		mx--;
	}

	printf("Control node #%d: done receiving. ",taskid);

	return;
}

 /******************************************************************/
